import numpy as np
from matplotlib import rcParams
import matplotlib.pyplot as pl
import scanpy.api as sc
import pandas as pd
import matplotlib.pyplot as plt
import episcanpy.api as epi
import anndata as ad
import numpy as np
sc.settings.verbosity = 3 # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.settings.set_figure_params(dpi=100, color_map='viridis') # low dpi (dots per inch) yields small inline figures
sc.logging.print_versions()
#Set working directory
wd = '/path/to/file/sample_pooled_preprocess_revision1/'
direc = wd
sc.settings.figdir = './plots/'
results_file="./write/20190416_snATACseq_embryo_revision01_doublets_cisTopic_50_100_filtered_final.h5ad"
##====== Read counts ======##
filename_data = direc + '11_matrix_afterClusterQC/embryo_revision1_allPeaks_afterClusterPeak.mat.bin.mtx'
filename_gene_names = direc + '11_matrix_afterClusterQC/embryo_revision1_allPeaks_passedQC_peakNames.txt'
filename_barcodes = direc + '11_matrix_afterClusterQC/embryo_revision1_allPeaks_passedQC_barcodeNames.xgi'
#filename_clustersAll = wd + 'data/cellTypes_20180215.txt'
#adata_all_cells = epi.tl.read_ATAC(filename_data, filename_barcodes, filename_gene_names,path_file='')
print('reading counts')
adata_all_cells = sc.read(filename_data, cache=True).transpose()
adata_all_cells.X = adata_all_cells.X.astype(np.int64)
print('reading genes')
adata_all_cells.var_names = np.genfromtxt(filename_gene_names, dtype='str')
print('reading cells')
adata_all_cells.obs_names = np.genfromtxt(filename_barcodes, dtype='str')
adata_all_cells.shape
#adata = sc.read(results_file)
##====== Create colour palette for gene expression profiles ======##
from matplotlib.colors import LinearSegmentedColormap
rmap = LinearSegmentedColormap.from_list(name='gene_cmap',
colors=['lightgrey', 'thistle', 'red', 'darkred'])
cmap = LinearSegmentedColormap.from_list(name='gene_cmap',
colors=["#BFBFBF","#6495ED","#000000"])
cells = np.genfromtxt(direc + "12_barcodeStats_celltypePeaks/embryo_revision1_readsPeaks24.xgi", dtype='str')
adata = adata_all_cells[np.array([str(i) in cells for i in adata_all_cells.obs_names]),:]
adata.shape
from scipy.io import mmread
cistopic = mmread(direc + '13_cisTopic/data/cisTopic_matrix_50_100_afterReadsInPeaksQC_24.mtx')
cistopic = cistopic.todense()
cistopic= cistopic.transpose()
cistopic.shape
adata.obsm['X_pca'] = cistopic
adata.obs=pd.DataFrame(cistopic)
sc.pp.neighbors(adata)
sc.tl.umap(adata,random_state=4)
sc.tl.louvain(adata,random_state=7)
sc.pl.umap(adata, color=[ 'louvain'])
for i in adata.obs:
sc.pl.umap(adata, color=[i])